This document contains analysis of differences in microbial alpha diversity between healthy samples from American Gut Project and samples with diagnosed Inflammatory bowel syndrome (Ulcerative colitis and Crohn’s disease) and Clostridium difficile infection, as well as samples after fecal microbiota transplantation.

The aim of this analysis is to show that there are significant differences between microbiome alpha diversity in healthy and disease samples. Furthermore, we want to show that fecal transplantation improves alpha diversity in short- and long-term.

Loading libraries:

#install.packages("readr")
library("readr")
#install.packages('data.table')
library(data.table)
library(stringr)
#install.packages('ggplot2')
library(ggplot2)
library(gridExtra)
library(grid)
library(plyr)
library(purrr)
library(dplyr)
#install.packages('flextable')
library(flextable)
library(tibble)
library(ComplexHeatmap)
library(RColorBrewer)
library(ggplotify)
library(here)
library(tableone)

#install.packages("table1")
library(table1)

Load healthy subsample of AGP data:

# Load healthy samples' table
all_healthy <- read.csv(here("01_tidy_data", "AGP_healthy.csv.gz"), header = TRUE, sep = ",")

all_healthy <- dplyr::select(all_healthy, sample_id, shannon_entropy, chao1, menhinick, margalef, fisher_alpha, simpson, pielou_evenness, gini_index, strong, simpson, faith_pd, sex, race, host_age, condition)

names(all_healthy)[names(all_healthy) == 'host_age'] <- 'age'

Load Inflammatory Bowel Disease data:

IBD <- read.csv(here("01_tidy_data", "IBD.csv.gz"), header = TRUE, sep = ",")

Load Ulcerative colitis data:

UC <- read.csv(here("01_tidy_data", "UC.csv.gz"), header = TRUE, sep = ",")

Load Longitudinal Chron’s disease study data:

CD <- read.csv(here("01_tidy_data", "CD_2.csv.gz"), header = TRUE, sep = ",")

Load CDI data from NCBI data

CDI <- read.csv(here("01_tidy_data", "ncbi_CDI.csv.gz"), header = TRUE, sep = ",")

Load Changes following fecal microbial transplantation for recurrent CDI data:

CDI_FMT <- read.csv(here("01_tidy_data", "CDI_FMT.csv.gz"), header = TRUE, sep = ",")

Load Fecal transplant - CDI with underlying IBD data:

FMT_IBD_CDI <- read.csv(here("01_tidy_data", "FMT_IBD_CDI.csv.gz"), header = TRUE, sep = ",")

Load Hospital Clinic’s CDI data

hospital_CDI <- read.csv(here("01_tidy_data", "hosp_CDI.csv.gz"), header = TRUE, sep = ",")

Let’s define vector of names of the alpha diversity metrics that are going to be analysed:

metric <- c("chao1", "margalef", "menhinick", "fisher_alpha", "faith_pd", "gini_index", "strong", "pielou_evenness", "shannon_entropy", "simpson") 

Table 1

The summary of the data contained in all datasets (treating all metrics as not normally distributed):

all_data <- rbind.fill(all_healthy, IBD, UC, CD, CDI, CDI_FMT, FMT_IBD_CDI)
  
all_data <- dplyr::select(all_data, shannon_entropy, chao1, menhinick, margalef, fisher_alpha, simpson, gini_index, strong, pielou_evenness, faith_pd, condition)

all_data$condition[all_data$condition=="control"] <- "healthy"
all_data$condition[all_data$condition=="crohns"] <- "CD"
all_data$condition[all_data$condition=="Cdif"] <- "CDI"
all_data <- all_data %>% filter (condition != "Donors")

table_one <- CreateTableOne(data = all_data) 
print(table_one, nonnormal = metric)
##                                 
##                                  Overall                
##   n                                2294                 
##   shannon_entropy (median [IQR])   4.49 [3.60, 5.25]    
##   chao1 (median [IQR])           172.14 [121.51, 228.93]
##   menhinick (median [IQR])         1.95 [1.35, 2.53]    
##   margalef (median [IQR])         16.38 [11.41, 21.25]  
##   fisher_alpha (median [IQR])     26.74 [17.32, 36.79]  
##   simpson (median [IQR])           0.90 [0.81, 0.94]    
##   gini_index (median [IQR])        1.00 [1.00, 1.00]    
##   strong (median [IQR])            0.69 [0.65, 0.74]    
##   pielou_evenness (median [IQR])   0.64 [0.54, 0.71]    
##   faith_pd (median [IQR])         16.81 [12.85, 21.06]  
##   condition (%)                                         
##      CD                             319 (13.9)          
##      CDI                            100 ( 4.4)          
##      CDI + CD                         6 ( 0.3)          
##      CDI + UC                         6 ( 0.3)          
##      healthy                       1823 (79.5)          
##      UC                              40 ( 1.7)
all_data$condition <- as.factor(all_data$condition)
all_data$condition <- ordered(all_data$condition, levels = c("healthy", "CD", "UC", "CDI", "CDI + CD", "CDI + UC"))
table1(~ chao1 + margalef + menhinick + fisher_alpha + faith_pd + gini_index + strong + pielou_evenness + shannon_entropy + simpson | condition, data=all_data)
healthy
(N=1823)
CD
(N=319)
UC
(N=40)
CDI
(N=100)
CDI + CD
(N=6)
CDI + UC
(N=6)
Overall
(N=2294)
chao1
Mean (SD) 194 (73.6) 128 (68.8) 106 (55.3) 61.6 (38.7) 54.5 (38.2) 70.5 (53.7) 177 (80.1)
Median [Min, Max] 188 [20.0, 516] 124 [8.00, 361] 103 [39.0, 318] 52.4 [7.00, 199] 43.2 [17.0, 119] 63.3 [17.2, 137] 172 [7.00, 516]
margalef
Mean (SD) 18.1 (6.62) 12.3 (6.55) 10.8 (5.20) 6.33 (3.63) 4.95 (3.21) 6.91 (4.99) 16.6 (7.21)
Median [Min, Max] 17.6 [2.11, 42.9] 12.7 [0.869, 28.2] 10.5 [4.16, 29.0] 5.55 [0.686, 20.1] 4.31 [1.25, 10.5] 6.62 [2.00, 13.1] 16.4 [0.686, 42.9]
menhinick
Mean (SD) 2.15 (0.805) 1.42 (0.800) 1.59 (1.02) 0.803 (0.551) 0.742 (0.469) 1.03 (0.729) 1.97 (0.878)
Median [Min, Max] 2.08 [0.269, 5.18] 1.41 [0.108, 4.05] 1.44 [0.537, 6.36] 0.626 [0.0885, 2.96] 0.648 [0.201, 1.55] 0.986 [0.310, 1.94] 1.95 [0.0885, 6.36]
fisher_alpha
Mean (SD) 30.9 (13.9) 19.6 (11.9) 17.8 (12.4) 9.08 (6.46) 6.95 (5.22) 10.5 (8.44) 28.0 (14.6)
Median [Min, Max] 29.2 [2.50, 90.9] 19.5 [1.02, 50.9] 16.2 [5.34, 75.8] 7.33 [0.778, 36.7] 5.68 [1.44, 16.3] 9.77 [2.38, 21.4] 26.7 [0.778, 90.9]
faith_pd
Mean (SD) 18.0 (5.37) 14.8 (6.58) 14.3 (10.4) 6.22 (3.14) 6.71 (3.10) 7.85 (3.47) 16.9 (6.19)
Median [Min, Max] 17.6 [3.92, 36.0] 14.0 [2.82, 40.1] 11.3 [6.06, 68.0] 5.36 [1.30, 17.2] 6.04 [2.88, 11.9] 7.63 [4.52, 12.2] 16.8 [1.30, 68.0]
gini_index
Mean (SD) 0.999 (0.00160) 0.996 (0.00513) 0.983 (0.00986) 0.987 (0.00661) 0.994 (0.00285) 0.992 (0.00466) 0.998 (0.00447)
Median [Min, Max] 1.00 [0.991, 1.00] 0.997 [0.962, 1.00] 0.983 [0.950, 0.996] 0.988 [0.966, 0.998] 0.994 [0.990, 0.998] 0.993 [0.984, 0.997] 1.00 [0.950, 1.00]
strong
Mean (SD) 0.697 (0.0723) 0.710 (0.0646) 0.660 (0.0622) 0.689 (0.0726) 0.713 (0.0421) 0.686 (0.0797) 0.697 (0.0714)
Median [Min, Max] 0.685 [0.501, 0.955] 0.707 [0.520, 0.861] 0.649 [0.577, 0.832] 0.694 [0.475, 0.887] 0.714 [0.670, 0.786] 0.696 [0.573, 0.777] 0.688 [0.475, 0.955]
pielou_evenness
Mean (SD) 0.615 (0.142) 0.579 (0.126) 0.648 (0.102) 0.564 (0.127) 0.499 (0.123) 0.600 (0.0922) 0.608 (0.139)
Median [Min, Max] 0.650 [0.0367, 0.841] 0.599 [0.174, 0.832] 0.676 [0.340, 0.770] 0.574 [0.104, 0.792] 0.549 [0.287, 0.611] 0.601 [0.491, 0.712] 0.642 [0.0367, 0.841]
shannon_entropy
Mean (SD) 4.47 (1.22) 3.83 (1.23) 4.15 (0.943) 3.19 (0.975) 2.71 (0.961) 3.22 (0.820) 4.31 (1.25)
Median [Min, Max] 4.70 [0.187, 7.02] 3.97 [0.679, 6.12] 4.41 [1.92, 5.70] 3.25 [0.431, 5.61] 2.94 [1.18, 3.60] 3.29 [2.12, 4.48] 4.49 [0.187, 7.02]
simpson
Mean (SD) 0.850 (0.160) 0.815 (0.144) 0.867 (0.0924) 0.764 (0.154) 0.708 (0.178) 0.810 (0.0781) 0.841 (0.158)
Median [Min, Max] 0.907 [0.0309, 0.984] 0.857 [0.226, 0.976] 0.905 [0.575, 0.961] 0.801 [0.110, 0.960] 0.756 [0.403, 0.875] 0.815 [0.679, 0.906] 0.898 [0.0309, 0.984]
#table_test <- data.frame(table1(~ chao1 + margalef + menhinick + fisher_alpha + faith_pd + gini_index + strong + pielou_evenness + shannon_entropy + simpson | condition, data=all_data))

Let’s define function for plotting violin plots that we are going to use in the whole analysis:

plot_violin <- function(df, column){
  violin <- vector('list', length(metric))
  
  for (i in 1:length(metric)){
    mean_line <- df %>% dplyr::group_by(.data[[column]]) %>% dplyr::summarise(grp_mean=mean(.data[[metric[i]]]))
    plot_data <- df %>% dplyr::group_by(.data[[column]]) %>% dplyr::mutate(m = mean(.data[[metric[i]]])) 
  
    violin[[i]] <- plot_data %>% ggplot(aes(x = .data[[metric[i]]], y = reorder(.data[[column]], -m), color = .data[[column]], fill = .data[[column]])) +
      geom_violin()+
      geom_boxplot(width=0.1, color="grey", alpha=0.2) +
      geom_vline(data = mean_line, aes(xintercept = grp_mean, color = .data[[column]]), linetype = "dashed")+ 
      xlab(if (metric[i] == "shannon_entropy"){"Shannon entropy (❋)"} else if (metric[i] =="chao1"){"Chao1 (+)"} else if (metric[i] == "menhinick"){"Menhinick (+)"} else if (metric[i] == "margalef"){"Margalef (+)"} else if (metric[i] == "fisher_alpha"){"Fisher alpha (+)"} else if (metric[i] == "simpson"){"Simpson (❋)"} else if (metric[i] == "gini_index"){"Gini index (x)"} else if (metric[i] == "strong"){"Strong dominance (x)"} else if (metric[i] == "pielou_evenness"){"Pielou evenness (x)"} else if (metric[i] == "faith_pd"){"Faith PD (+)"}) +
      ylab("")+
      theme(legend.position="none", axis.title.x = element_text(size=10))
  }
  return(violin)
}

Function for doing Mann-Whitney-Wilcoxon test:

do_wilcox_test <- function(df, column){
  test <- list()
  
  for (i in 1:length(metric)){
    test[[i]] <- pairwise.wilcox.test(df[[metric[i]]], df[[column]], p.adjust.method="none") %>% 
    broom::tidy() %>% add_column(parameter = metric[i], .before='group1')
    #test[[i]]$p.value <- round(test[[i]]$p.value, digits = 16)
  }
  
  tests <- do.call(what = rbind, args = test)
  
  return(tests)
}
label_fun <- function(x){
  if (x == "shannon_entropy"){y <- "Shannon entropy (❋)"} else if (x =="chao1"){y <- "Chao1 (+)"} else if (x == "menhinick"){y <- "Menhinick (+)"} else if (x == "margalef"){y <- "Margalef (+)"} else if (x == "fisher_alpha"){y <- "Fisher alpha (+)"} else if (x == "simpson"){y <- "Simpson (❋)"} else if (x == "gini_index"){y <- "Gini index (x)"} else if (x == "strong"){y <- "Strong dominance (x)"} else if (x == "pielou_evenness"){y <- "Pielou evenness (x)"} else if (x == "faith_pd"){y <- "Faith PD (+)"}
  return(y)
}

IBD and UC samples

Distributions of metrics in disease datasets

table(IBD$condition)
## 
## CD UC 
## 26  7
table(UC$condition)
## 
## UC 
## 33
#merge two datasets
UC_check <- UC 
UC_check$condition <- "UC_2"
IBD_check <-  rbind.fill(IBD, UC_check)

IBD_check$condition <- as.factor(IBD_check$condition)

table(IBD_check$condition)
## 
##   CD   UC UC_2 
##   26    7   33
nrow(IBD_check)
## [1] 66
violin_IBD_check <- vector('list', length(metric))

# Use violin function
violin_IBD_check <- plot_violin(IBD_check, "condition")

grid.arrange(violin_IBD_check[[1]], violin_IBD_check[[2]],violin_IBD_check[[3]], violin_IBD_check[[4]],violin_IBD_check[[5]], violin_IBD_check[[6]],violin_IBD_check[[7]], violin_IBD_check[[8]],violin_IBD_check[[9]], violin_IBD_check[[10]], ncol=4, top = textGrob("Distributions of 1) Shannon's index  2) Chao1 3) Menhinick's index \n4) Margalef's index 5) Fisher's index 6) Simpson 7) Gini index 8) Strong's index 9) Pielou's evenness \nand 10) Faith's PD in IBD datasets", gp=gpar(fontsize=10,font=2))) 

Mann-Whitney-Wilcoxon Test

tests_IBD_check <- list()

tests_IBD_check <- do_wilcox_test(IBD_check, "condition")

table <- tests_IBD_check %>% 
  # add_column(p.adjusted = round(p.adjust(tests_IBD_check$p.value, "fdr"), digits=16), .after='p.value') %>%
  add_column(p.adjusted = p.adjust(tests_IBD_check$p.value, "fdr"), .after='p.value') %>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "Results of the Mann-Whitney-Wilcoxon test for distributions of different conditions")

table

Results of the Mann-Whitney-Wilcoxon test for distributions of different conditions

parameter

group1

group2

p.value

p.adjusted

chao1

UC

CD

0.3989304427291079

0.854850948705231

chao1

UC_2

CD

0.0859103234105194

0.262411072607122

chao1

UC_2

UC

0.0747352973359165

0.262411072607122

margalef

UC

CD

0.9473207248234906

0.948538967726954

margalef

UC_2

CD

0.2023174837914895

0.505793709478724

margalef

UC_2

UC

0.2854936636483690

0.658831531496236

menhinick

UC

CD

0.9473207248234906

0.948538967726954

menhinick

UC_2

CD

0.0000012473417812

0.000012473417812

menhinick

UC_2

UC

0.0004241284287606

0.003180963215705

fisher_alpha

UC

CD

0.9473207248234906

0.948538967726954

fisher_alpha

UC_2

CD

0.0233613123501194

0.100119910071940

fisher_alpha

UC_2

UC

0.0874703575357072

0.262411072607122

faith_pd

UC

CD

0.9485389677269542

0.948538967726954

faith_pd

UC_2

CD

0.0000000001708668

0.000000005126004

faith_pd

UC_2

UC

0.0000004291025963

0.000006436538944

gini_index

UC

CD

0.8128574398040470

0.948538967726954

gini_index

UC_2

CD

0.5192607951497228

0.916342579675981

gini_index

UC_2

UC

0.6758943034484832

0.948538967726954

strong

UC

CD

0.8128574398040470

0.948538967726954

strong

UC_2

CD

0.0015417419155128

0.009250451493077

strong

UC_2

UC

0.0081046752873378

0.040523376436689

pielou_evenness

UC

CD

0.5033899431841589

0.916342579675981

pielou_evenness

UC_2

CD

0.1415440460724180

0.386029216561140

pielou_evenness

UC_2

UC

0.7016064528448431

0.948538967726954

shannon_entropy

UC

CD

0.9485389677269542

0.948538967726954

shannon_entropy

UC_2

CD

0.6879858286252339

0.948538967726954

shannon_entropy

UC_2

UC

0.8345801982024893

0.948538967726954

simpson

UC

CD

0.5033899431841589

0.916342579675981

simpson

UC_2

CD

0.9215635369220484

0.948538967726954

simpson

UC_2

UC

0.8618588939022376

0.948538967726954

Based on Mann-Whitney-Wilcoxon test, alpha diversity of UC samples from second study and CD are significantly different for Menhinick’s index, Fisher’s alpha (?), Faith’s PD and Strong’s evenness. Also UC samples from first and second study are significantly different for Menhinick’s index, Faith’s PD and Strong’s evenness. We can look on these samples as one population of IBD. It seems that UC samples from second study expres lower richness and evenness that the ones (7) from the first study.

Healthy samples vs IBD samples

#merge two datasets
healthy_disease <-  rbind.fill(all_healthy, IBD, UC)

healthy_disease$condition <- as.factor(healthy_disease$condition)
healthy_disease$condition <- relevel(healthy_disease$condition, "healthy")

table(healthy_disease$condition)
## 
## healthy      CD      UC 
##    1470      26      40

Explore differences in distribution shape and mean values of groups with different conditions by ploting violin plot.

violin_IBD <- vector('list', length(metric))

# Use violin function
violin_IBD <- plot_violin(healthy_disease, "condition")

grid.arrange(violin_IBD[[1]], violin_IBD[[2]],violin_IBD[[3]], violin_IBD[[4]],violin_IBD[[5]], violin_IBD[[6]],violin_IBD[[7]], violin_IBD[[8]],violin_IBD[[9]], violin_IBD[[10]], ncol=4) 

Mann-Whitney-Wilcoxon Test

Test whether different conditions separate into distinct distributions with Mann-Whitney-Wilcoxon test:

tests_IBD <- list()

tests_IBD <- do_wilcox_test(healthy_disease, "condition")

table1 <- tests_IBD %>% 
  # add_column(p.adjusted = round(p.adjust(tests_IBD$p.value, "fdr"), digits=16), .after='p.value') %>%
  add_column(p.adjusted = p.adjust(tests_IBD$p.value, "fdr"), .after='p.value') %>%
  arrange(p.value)  %>%
  filter(group1=="CD") %>%
  filter(group2=="healthy") %>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "Results of the Mann-Whitney-Wilcoxon test for distributions of different conditions")

table2 <- tests_IBD %>% 
  # add_column(p.adjusted = round(p.adjust(tests_IBD$p.value, "fdr"), digits=16), .after='p.value') %>%
  add_column(p.adjusted = p.adjust(tests_IBD$p.value, "fdr"), .after='p.value') %>%
  arrange(p.value)  %>%
  filter(group1=="UC") %>%
  filter(group2=="healthy") %>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "Results of the Mann-Whitney-Wilcoxon test for distributions of different conditions")


table1

Results of the Mann-Whitney-Wilcoxon test for distributions of different conditions

parameter

group1

group2

p.value

p.adjusted

gini_index

CD

healthy

0.000000000000000002104898

0.00000000000000003157347

chao1

CD

healthy

0.000000003145739791286853

0.00000001572869895643426

margalef

CD

healthy

0.000000044123603489938707

0.00000016961554455053853

strong

CD

healthy

0.000000093943371912849768

0.00000028183011573854928

faith_pd

CD

healthy

0.000004317708939790003593

0.00001079427234947500898

fisher_alpha

CD

healthy

0.000067588438353935822740

0.00014483236790129104098

pielou_evenness

CD

healthy

0.008314214349516924409955

0.01558915190534423261814

menhinick

CD

healthy

0.071993652141507089026184

0.10799047821226062660038

shannon_entropy

CD

healthy

0.149845365558169796305066

0.20433458939750426264226

simpson

CD

healthy

0.753569252121400134925011

0.76938809982443590040901

library(writexl)
CD_table <- tests_IBD %>% 
  # add_column(p.adjusted = round(p.adjust(tests_IBD$p.value, "fdr"), digits=16), .after='p.value') %>%ç
  add_column(p.adjusted = p.adjust(tests_IBD$p.value, "fdr"), .after='p.value') %>%
  arrange(p.value)  %>%
  filter(group1=="CD") %>%
  filter(group2=="healthy") 
write_xlsx(CD_table, here("03_plots_and_tables", "CD_AGP_table.xlsx"))

There is significant difference between healthy samples and CD samples in regard to Faith PD, Gini index, Chao1, Margalef’s index and Fisher alpha. No significant difference was detected for Pielou evenness, Menhinick, Strong, Shannon entropy and Simpson’s index.

table2

Results of the Mann-Whitney-Wilcoxon test for distributions of different conditions

parameter

group1

group2

p.value

p.adjusted

gini_index

UC

healthy

0.000000000000000000000000003272936

0.00000000000000000000000009818809

chao1

UC

healthy

0.000000000000013954589620631019591

0.00000000000013954589620631020222

margalef

UC

healthy

0.000000000001414657646816701040448

0.00000000001060993235112525719751

fisher_alpha

UC

healthy

0.000000000009568323701783059584570

0.00000000005740994221069835750742

faith_pd

UC

healthy

0.000000045230811880143609889116805

0.00000016961554455053852881238189

menhinick

UC

healthy

0.000000080530680442876098703677596

0.00000026843560147625366234559199

strong

UC

healthy

0.000715720533566182757698181937656

0.00143144106713236551539636387531

shannon_entropy

UC

healthy

0.018938558066333596036079356395021

0.03156426344388933019624587927865

pielou_evenness

UC

healthy

0.204764058792080400062118883397488

0.26708355494619184788973598188022

simpson

UC

healthy

0.487885401032781773622559740033466

0.56294469349936349100715915483306

UC_table<- tests_IBD %>% 
  # add_column(p.adjusted = round(p.adjust(tests_IBD$p.value, "fdr"), digits=5), .after='p.value') %>%
  add_column(p.adjusted = p.adjust(tests_IBD$p.value, "fdr"), .after='p.value') %>%
  arrange(p.value)  %>%
  filter(group1=="UC") %>%
  filter(group2=="healthy")
write_xlsx(UC_table, here("03_plots_and_tables", "UC_AGP_table.xlsx"))

It seems like there is significant difference between healthy samples and UC samples in all alpha metrics except from Shannon entropy, Strong’s evennes, Simpsons index and Pielou evenness (almost all of the “evenness” metrics except from Gini). “Richness” indices are all signifficantly different.

Now lets see which parameters show significant difference between healthy and all IBD samples:

wilcox_healthy_disease <- healthy_disease %>%
  summarise(Chao1 = wilcox.test(chao1[condition == "healthy"], chao1[condition != "healthy"])$p.value,
            Margalef = wilcox.test(margalef[condition == "healthy"], margalef[condition != "healthy"])$p.value,
            Menhinick = wilcox.test(menhinick[condition == "healthy"], menhinick[condition != "healthy"])$p.value,
            Fisher = wilcox.test(fisher_alpha[condition == "healthy"], fisher_alpha[condition != "healthy"])$p.value,
            Faith = wilcox.test(faith_pd[condition == "healthy"], faith_pd[condition != "healthy"])$p.value,
            Gini = wilcox.test(gini_index[condition == "healthy"], gini_index[condition != "healthy"])$p.value,
            Strong = wilcox.test(strong[condition == "healthy"], strong[condition != "healthy"])$p.value,
            Pielou = wilcox.test(pielou_evenness[condition == "healthy"], pielou_evenness[condition != "healthy"])$p.value,
            Shannon = wilcox.test(shannon_entropy[condition == "healthy"], shannon_entropy[condition != "healthy"])$p.value,
            Sipson = wilcox.test(simpson[condition == "healthy"], simpson[condition != "healthy"])$p.value) 

wilcox_healthy_disease <- t(wilcox_healthy_disease)
colnames(wilcox_healthy_disease) <- c("p.value")
wilcox_healthy_disease <- data.frame(alpha_metric = row.names(wilcox_healthy_disease), wilcox_healthy_disease)
wilcox_healthy_disease$p.value <- round(wilcox_healthy_disease$p.value, digits = 5)

wilcox_healthy_disease %>%
  add_column(p.adjusted = round(p.adjust(wilcox_healthy_disease$p.value, "fdr"), digits = 5), .after='p.value') %>%
  arrange(p.value)  %>%
  flextable() %>%
  bold(~ p.value < 0.05, 2) %>%
  bold(~ p.adjusted < 0.05, 3) %>%
  add_header_lines(values = "Results of the Mann-Whitney-Wilcoxon test for distributions of alpha diversity indices between healthy and IBD samples")

Results of the Mann-Whitney-Wilcoxon test for distributions of alpha diversity indices between healthy and IBD samples

alpha_metric

p.value

p.adjusted

Chao1

0.00000

0.00000

Margalef

0.00000

0.00000

Fisher

0.00000

0.00000

Gini

0.00000

0.00000

Strong

0.00000

0.00000

Menhinick

0.00247

0.00412

Shannon

0.00686

0.00980

Pielou

0.00896

0.01120

Faith

0.16918

0.18798

Sipson

0.46537

0.46537

There is significant difference between healthy samples and those that are not healthy in all alpha metrics except from Pielou evenness, Simpson’s index and Strong’s evenness.

Kruskal-Wallis Test

Kruskal-Wallis Testis a non-parametric method for testing whether samples originate from the same distribution. It is used for comparing two or more independent samples of equal or different sample sizes. It extends the Mann–Whitney U test, which is used for comparing only two groups (source: Wikipedia)

kruskal_results <- healthy_disease %>%
  summarise(Chao1 = kruskal.test(healthy_disease$chao1 ~ healthy_disease$condition)$p.value,
            Margalef = kruskal.test(healthy_disease$margalef ~ healthy_disease$condition)$p.value,
            Menhinick = kruskal.test(healthy_disease$menhinick ~ healthy_disease$condition)$p.value,
            Fisher = kruskal.test(healthy_disease$fisher_alpha ~ healthy_disease$condition)$p.value,
            Faith = kruskal.test(healthy_disease$faith_pd ~ healthy_disease$condition)$p.value,
            Gini = kruskal.test(healthy_disease$gini_index ~ healthy_disease$condition)$p.value,
            Strong = kruskal.test(healthy_disease$strong ~ healthy_disease$condition)$p.value,
            Pielou = kruskal.test(healthy_disease$pielou_evenness ~ healthy_disease$condition)$p.value,
            Shannon = kruskal.test(healthy_disease$shannon_entropy ~ healthy_disease$condition)$p.value,
            Sipson = kruskal.test(healthy_disease$simpson ~ healthy_disease$condition)$p.value)

kruskal_results_df <- as.data.frame(t(kruskal_results))
colnames(kruskal_results_df) <- c("p.value")
kruskal_results_df <- data.frame(alpha_metric = row.names(kruskal_results_df), kruskal_results_df)
kruskal_results_df$p.value <- round(kruskal_results_df$p.value, digits = 5)


kruskal_results_df %>% 
  add_column(p.adjusted = round(p.adjust(kruskal_results_df$p.value, "fdr"), digits = 5), .after='p.value') %>%
  arrange(p.value)  %>%
  flextable() %>%
  bold(~ p.value < 0.05, 2) %>%
   bold(~ p.adjusted < 0.05, 3) %>%
  add_header_lines(values = "Results of the Kruskal-Wallis test for differentiation of alpha diversity indices across different conditions")

Results of the Kruskal-Wallis test for differentiation of alpha diversity indices across different conditions

alpha_metric

p.value

p.adjusted

Chao1

0.00000

0.00000

Margalef

0.00000

0.00000

Menhinick

0.00000

0.00000

Fisher

0.00000

0.00000

Faith

0.00000

0.00000

Gini

0.00000

0.00000

Strong

0.00000

0.00000

Pielou

0.01456

0.01820

Shannon

0.02423

0.02692

Sipson

0.75092

0.75092

Kruskal-Wallis test shows similar results as Mann-Withney-Wilcoxon test, except that in this test Shannon entropy is also not significantly different in healthy vs unhealthy comparison. The downside of this analysis is the small sample size of IBD dataset (UC and CD).

Longitudinal Chron’s disease analysis

Since previous analysis consisted from small sample size for IBD dataset and Chron’s disease samples showed unexpectedly high alpha diversity in various indexes, lets incorporate another study, this time only with CD patients. The sample size of this data set is 646. The number of patients and controls is balanced and controls are close relatives of patients. Each patient was sampled multiple times during the period of 6 weeks. Some patients previously underwent a surgical intervention on some part of their digestive system.

table(CD$description, CD$condition)
##                           
##                            control crohns
##   father family 01-00010        27      0
##   father family 01-00011        20      0
##   father family 01-00012        54      0
##   father family 01-00013        27      0
##   fecal sample index pt 20       0     18
##   fecal sample index pt 21       0     18
##   fecal sample index pt 23       0     17
##   fecal sample index pt 24       0     23
##   fecal sample index pt 25       0     13
##   fecal sample index pt 26       0     16
##   fecal sample index pt 27       0     10
##   fecal sample index pt 31       0     10
##   fecal sample index pt 32       0     18
##   fecal sample index pt 33       0     19
##   fecal sample index pt 35       0     11
##   fecal sample index pt 36       0      5
##   fecal sample index pt 37       0     21
##   fecal sample index pt 39       0     21
##   index family 01-00012          0     47
##   index family 01-00013          0     26
##   mother family 01-00010        25      0
##   mother family 01-00011        23      0
##   mother family 01-00012        57      0
##   mother family 01-00013        27      0
##   sibling family 01-00010       18      0
##   sibling family 01-00012       55      0
##   sibling family 01-00013       20      0
table(CD$condition)
## 
## control  crohns 
##     353     293
table(CD$surgery_and_ibd)
## 
##          control           crohns crohns (surgery) 
##              353              159              134

Distribution and mean differences between conditions

Lets do Violin plot to see the difference in alpha diversity between cases and controls in this study:

violin_CDa <- vector('list', length(metric))

# Use violin function
violin_CDa <- plot_violin(CD, "condition")

grid.arrange(violin_CDa[[1]], violin_CDa[[2]], violin_CDa[[3]], violin_CDa[[4]], violin_CDa[[5]], violin_CDa[[6]], violin_CDa[[7]],  violin_CDa[[9]], violin_CDa[[10]], ncol=4, top = textGrob("Distributions of 1) Shannon's index  2) Chao1 3) Menhinick's index \n4) Margalef's index 5) Fisher's index 6) Simpson 7) Gini index 8) Strong's index 9) Pielou's evenness \nand 10) Faith's PD in longitudinal CD datasets", gp=gpar(fontsize=10,font=2))) 

Lets now compare alpha diversity between cases and controls but taking into account if they underwent surgery:

violin_CDb <- vector('list', length(metric))

# Use violin function
violin_CDb <- plot_violin(CD, "surgery_and_ibd")

#violin_CDb

grid.arrange(violin_CDb[[1]], violin_CDb[[2]], violin_CDb[[3]], violin_CDb[[4]], violin_CDb[[5]], violin_CDb[[6]], violin_CDb[[7]], violin_CDb[[8]], violin_CDb[[9]], violin_CDb[[10]], ncol=3) 

Lets plot differences in effect of different surgical interventions:

violin_CD_surg <- vector('list', length(metric))

# Use violin function
violin_CD_surg <- plot_violin(CD, "surgery_type")

violin_CD_surg
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

Finally, lets compare cases and control from this study with cases from previous IBD studies and AGP controls:

CD_1 <- healthy_disease[healthy_disease$condition != "UC",]

CD_surg <- CD
CD_surg$condition <- NULL
names(CD_surg)[names(CD_surg) == 'surgery_and_ibd'] <- 'condition'

CD_check <- rbind.fill(CD_1, CD_surg)

CD_check$condition[CD_check$condition == "CD"] <-'CD_1'
CD_check$condition[CD_check$condition == "crohns"] <-'CD_2'
CD_check$condition[CD_check$condition == "crohns (surgery)"] <-'CD_surgery'
CD_check$condition[CD_check$condition == "healthy"] <-'control(AGP)'
CD_check$condition[CD_check$condition == "control"] <-'control_2'
violin_CD_check <- vector('list', length(metric))

# Use violin function
violin_CD_check <- plot_violin(CD_check, "condition")

violin_CD_check
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

Here we can see that alpha diversity of CD samples from previous data set have similar mean values as CD samples for all alpha indexes except from Faith’s PD which is much higher and Gini index which is much lower than in this longitudinal study. On the other side, CD samples with past surgery had much lower mean in all measures.

This probably mean that high diversity of CD samples is not a mistake, those are probably just samples with better clinical status.

When it comes to samples with previous surgery, it is unclear whether the lower alpha diversity comes from the severity of CD symptoms or as a direct consequence of surgery (no info about the time that passed after surgery).

test_CD_1 <- list()
test_CD <- list()

test_CD_1 <- do_wilcox_test(CD_1, "condition")
test_CD <- do_wilcox_test(CD, "surgery_and_ibd")

table_CD_1 <- test_CD_1 %>% 
  # add_column(p.adjusted = round(p.adjust(test_CD_1$p.value, "fdr"), digits=16), .after='p.value') %>%
  add_column(p.adjusted = p.adjust(test_CD_1$p.value, "fdr"), .after='p.value') %>%
  arrange(p.value, parameter)  %>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "Crohns vs controls in CD_1 dataset")

table_CD_2 <- test_CD %>% 
  # add_column(p.adjusted = round(p.adjust(test_CD$p.value, "fdr"), digits=16), .after='p.value') %>%
  add_column(p.adjusted = p.adjust(test_CD$p.value, "fdr"), .after='p.value') %>%
  arrange(p.value)  %>%
  filter(group1 == "crohns")  %>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "Crohns vs controls in CD dataset")

table_CD_3 <- test_CD %>% 
  # add_column(p.adjusted = round(p.adjust(test_CD$p.value, "fdr"), digits=16), .after='p.value') %>%
  add_column(p.adjusted = p.adjust(test_CD$p.value, "fdr"), .after='p.value') %>%
  arrange(p.value)  %>%
  filter(group1 != "crohns" & group2 == "control")  %>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "Crohns (surgery) vs controls in CD dataset")

Results of the Mann-Whitney-Wilcoxon test for diversity distributions of healthy and CD samples in first study:

table_CD_1

Crohns vs controls in CD_1 dataset

parameter

group1

group2

p.value

p.adjusted

gini_index

CD

healthy

0.000000000000000002104898

0.00000000000000002104898

chao1

CD

healthy

0.000000003145739791286853

0.00000001572869895643426

margalef

CD

healthy

0.000000044123603489938707

0.00000014707867829979570

strong

CD

healthy

0.000000093943371912849768

0.00000023485842978212443

faith_pd

CD

healthy

0.000004317708939790003593

0.00000863541787958000719

fisher_alpha

CD

healthy

0.000067588438353935822740

0.00011264739725655971360

pielou_evenness

CD

healthy

0.008314214349516924409955

0.01187744907073846369061

menhinick

CD

healthy

0.071993652141507089026184

0.08999206517688386475218

shannon_entropy

CD

healthy

0.149845365558169796305066

0.16649485062018867798095

simpson

CD

healthy

0.753569252121400134925011

0.75356925212140013492501

long_CD_table <- test_CD %>% 
  # add_column(p.adjusted = round(p.adjust(test_CD$p.value, "fdr"), digits=16), .after='p.value') %>%
  add_column(p.adjusted = p.adjust(test_CD$p.value, "fdr"),  .after='p.value') %>%
  arrange(p.value)  %>%
  filter(group1 == "crohns")
write_xlsx(long_CD_table, here("03_plots_and_tables", "long_CD_table.xlsx"))

This table shows that, in first study, half of the indices don’t show significant difference between healthy and CD samples (Pielou, Menhinick, Strong, Shannon and Simpson).

Results of the Mann-Whitney-Wilcoxon test for diversity distributions of healthy and CD samples in second (longitudinal) study:

table_CD_2

Crohns vs controls in CD dataset

parameter

group1

group2

p.value

p.adjusted

simpson

crohns

control

0.00007140015

0.0001071002

pielou_evenness

crohns

control

0.00010339568

0.0001477081

shannon_entropy

crohns

control

0.00067486314

0.0009202679

chao1

crohns

control

0.00073712337

0.0009614653

strong

crohns

control

0.00086584638

0.0010823080

gini_index

crohns

control

0.00925984432

0.0106844358

faith_pd

crohns

control

0.04426121654

0.0491791295

margalef

crohns

control

0.06701766893

0.0670176689

menhinick

crohns

control

0.06701766893

0.0670176689

fisher_alpha

crohns

control

0.06701766893

0.0670176689

long_CD_table <- test_CD %>% 
  # add_column(p.adjusted = round(p.adjust(test_CD$p.value, "fdr"), digits=16), .after='p.value') %>%
  add_column(p.adjusted = p.adjust(test_CD$p.value, "fdr"), .after='p.value') %>%
  arrange(p.value)  %>%
  filter(group1 == "crohns")
write_xlsx(long_CD_table, here("03_plots_and_tables", "long_CD_table.xlsx"))

Helathy and CD samples without surgery are different in all except three alpha diversity indices: Margalef, Menhinick and Fisher alpha.

table_CD_3

Crohns (surgery) vs controls in CD dataset

parameter

group1

group2

p.value

p.adjusted

chao1

crohns (surgery)

control

0.000000000000000000000001390898

0.00000000000000000000004172695

gini_index

crohns (surgery)

control

0.000000000000000000000021437087

0.00000000000000000000032155630

shannon_entropy

crohns (surgery)

control

0.000000000000000000000042004076

0.00000000000000000000042004076

margalef

crohns (surgery)

control

0.000000000000000000001230189986

0.00000000000000000000615094993

menhinick

crohns (surgery)

control

0.000000000000000000001230189986

0.00000000000000000000615094993

fisher_alpha

crohns (surgery)

control

0.000000000000000000001230189986

0.00000000000000000000615094993

faith_pd

crohns (surgery)

control

0.000000000000000000104311853500

0.00000000000000000044705080072

simpson

crohns (surgery)

control

0.000000000000000000127227590168

0.00000000000000000047710346313

pielou_evenness

crohns (surgery)

control

0.000000000000000016486946949426

0.00000000000000005495648983142

strong

crohns (surgery)

control

0.000000000090373423236257942565

0.00000000022593355809064484349

long_CD_surg_table <- test_CD %>% 
  # add_column(p.adjusted = round(p.adjust(test_CD$p.value, "fdr"), digits=16), .after='p.value') %>%
  add_column(p.adjusted = p.adjust(test_CD$p.value, "fdr"),  .after='p.value') %>%
  arrange(p.value)  %>%
  filter(group1 != "crohns" & group2 == "control")
write_xlsx(long_CD_surg_table, here("03_plots_and_tables", "long_CD_surg_table.xlsx"))

All metrics show significant difference between healthy and Crohn’s samples that undergone some type of surgical procedure.

CD_check_w <- CD_check %>% filter(CD_check$condition != "control(AGP)" & CD_check$condition != "control_2" )

test_CD_3 <- list()

test_CD_3 <- do_wilcox_test(CD_check_w, "condition")

table_CD_3 <- test_CD_3 %>% 
  # add_column(p.adjusted = round(p.adjust(test_CD_3$p.value, "fdr"), digits=16), .after='p.value') %>%
  add_column(p.adjusted = p.adjust(test_CD_3$p.value, "fdr"), .after='p.value') %>%
  arrange(p.value, parameter)  %>%
  filter(group1 != "CD_surgery") 

table_CD_3%>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "Results of the Mann-Whitney-Wilcoxon test for distributions of different groups of Crhohn's disease patients")

Results of the Mann-Whitney-Wilcoxon test for distributions of different groups of Crhohn's disease patients

parameter

group1

group2

p.value

p.adjusted

gini_index

CD_2

CD_1

0.00000000000002008422

0.0000000000003012633

strong

CD_2

CD_1

0.00000000213826526466

0.0000000053456631617

faith_pd

CD_2

CD_1

0.00000006296477586863

0.0000001349245197185

menhinick

CD_2

CD_1

0.00000097390602158399

0.0000018260737904700

pielou_evenness

CD_2

CD_1

0.00015276764244923741

0.0002412120670251117

chao1

CD_2

CD_1

0.00586100874664171982

0.0073262609333021502

margalef

CD_2

CD_1

0.01064628192747416047

0.0127755383129689922

simpson

CD_2

CD_1

0.22596460381655775196

0.2421049326605975716

fisher_alpha

CD_2

CD_1

0.42370617751081035562

0.4383167353560107338

shannon_entropy

CD_2

CD_1

0.68844012013527189353

0.6884401201352718935

write_xlsx(table_CD_3, here("03_plots_and_tables", "long_CD_vs_CD1_table.xlsx"))
test_CD_check <- list()

test_CD_check <- do_wilcox_test(CD_check, "condition")

AGP_control_long_table <- test_CD_check %>% 
  # add_column(p.adjusted = round(p.adjust(test_CD_check$p.value, "fdr"), digits=16), .after='p.value') %>%
  add_column(p.adjusted = p.adjust(test_CD_check$p.value, "fdr"),  .after='p.value') %>%
  filter(group1 == "control(AGP)" & group2 == "control_2") %>%
  arrange(p.value, parameter)  

AGP_control_long_table%>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "AGP vs controls in CD_2 dataset")

AGP vs controls in CD_2 dataset

parameter

group1

group2

p.value

p.adjusted

gini_index

control(AGP)

control_2

0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001934422

0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001934422

menhinick

control(AGP)

control_2

0.0000000000000000000000000000023984799284316375654042500282833123158000659945240377880158138418483444851111497687912166298929150798358023166656494140625000000000000000000000000000000000000000000

0.00000000000000000000000000002664977698257375119381337731142027159457726103579468584915191220159797375359109206094387900520814582705497741699218750000000000000000000000000000000000000000000000

fisher_alpha

control(AGP)

control_2

0.0000000000004851275593146681991773260649741962959383134723623243189649656414985656738281250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

0.00000000000186587522813333935188103362625707632924121348594326263992115855216979980468750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

margalef

control(AGP)

control_2

0.0000000005605497794567362546094957924261784065755875872127944603562355041503906250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

0.00000000143730712681214446529092336024124171300897501168947201222181320190429687500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

chao1

control(AGP)

control_2

0.0000074955555826157725053042295282335061301637324504554271697998046875000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

0.00001315009751336100403862512664421302588380058296024799346923828125000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

faith_pd

control(AGP)

control_2

0.0004513886219967887052616217768985507063916884362697601318359375000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

0.00066380679705410107222657289938183566846419125795364379882812500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

simpson

control(AGP)

control_2

0.2080641727918373096173354497295804321765899658203125000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

0.22615670955634489813768084331968566402792930603027343750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

pielou_evenness

control(AGP)

control_2

0.4135023426013169078885312046622857451438903808593750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

0.43526562379085986798088470095535740256309509277343750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

shannon_entropy

control(AGP)

control_2

0.5290340029453185488605981845466885715723037719726562500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

0.54539587932507060941134113818407058715820312500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

strong

control(AGP)

control_2

0.7743176834418382670222058550280053168535232543945312500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

0.77431768344183826702220585502800531685352325439453125000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

write_xlsx(AGP_control_long_table, here("03_plots_and_tables", "AGP_control_long_table.xlsx"))

This table confirms that CD samples (without surgery) from two studies significantly differ in all except from Simpson, Fisher and Shannon index. Also healthy controls from longitudinal study are significantly different from AGP healthy population in all metrics except Pielou, Simpson and Shannon index. This tells us that we probably shouldn’t mix these datasets since they show different trends. The reason is probably the heterogeneity of IBD conditions and severity level which is not properly annotated.

Now, lets merge all data sets with IBD and AGP as control:

CD_merge <- CD %>%
  filter(condition != "not applicable")

CD_merge$condition[CD_merge$condition=="control"] <- "healthy"
CD_merge$condition[CD_merge$condition=="crohns"] <- "CD"

# healthy_disease_2 <- rbind.fill(healthy_disease, before_trans, CD_merge)
healthy_disease_2 <- rbind.fill(all_healthy, IBD, UC, CD_merge)

# Sizes of each dataset
table(healthy_disease_2$condition)
## 
##      CD healthy      UC 
##     319    1823      40
# Do Mann-Whitney-Wilcoxon test to see which parameters show significant differenc between healthy and unhealthy samples
wilcox_p_value <- healthy_disease_2 %>%
  summarise(Shannon = wilcox.test(shannon_entropy[condition == "healthy"], shannon_entropy[condition != "healthy"])$p.value,
            Chao1 = wilcox.test(chao1[condition == "healthy"], chao1[condition != "healthy"])$p.value,
            Menhinick = wilcox.test(menhinick[condition == "healthy"], menhinick[condition != "healthy"])$p.value,
            Margalef = wilcox.test(margalef[condition == "healthy"], margalef[condition != "healthy"])$p.value,
            Simpson = wilcox.test(simpson[condition == "healthy"], simpson[condition != "healthy"])$p.value,
            Fisher = wilcox.test(fisher_alpha[condition == "healthy"], fisher_alpha[condition != "healthy"])$p.value,
            Pielou = wilcox.test(pielou_evenness[condition == "healthy"], pielou_evenness[condition != "healthy"])$p.value,
            Gini = wilcox.test(gini_index[condition == "healthy"], gini_index[condition != "healthy"])$p.value,
            Strong = wilcox.test(strong[condition == "healthy"], strong[condition != "healthy"])$p.value,
            Faith = wilcox.test(faith_pd[condition == "healthy"], faith_pd[condition != "healthy"])$p.value) 

wilcox_p_value <- t(wilcox_p_value)
colnames(wilcox_p_value) <- c("p.value")
wilcox_p_value <- data.frame(Alpha_Metric = row.names(wilcox_p_value), wilcox_p_value)
# wilcox_p_value$p.value <- round(wilcox_p_value$p.value, digits = 16)

wilcox_p_value %>%
  #add_column(p.adjusted = round(p.adjust(wilcox_p_value$p.value, "fdr"), digits=16), .after='p.value') %>%
  add_column(p.adjusted = p.adjust(wilcox_p_value$p.value, "fdr"), .after='p.value') %>%
  flextable() %>%
  bold(~ p.value < 0.05, 2) %>%
  bold(~ p.adjusted < 0.05, 3) %>%
  add_header_lines(values = "Results of the Mann-Whitney-Wilcoxon test for distributions of different parameters between healthy and unhealthy samples")

Results of the Mann-Whitney-Wilcoxon test for distributions of different parameters between healthy and unhealthy samples

Alpha_Metric

p.value

p.adjusted

Shannon

0.000000000000000000065784344315194204105244370501992371969607520209976102331175806803287287038983777165412902832031250000000000

0.00000000000000000009397763473599172186992940820573540018513390617936525202498476705414987009135074913501739501953125000000000

Chao1

0.000000000000000000000000000000000000000000000000000231468800371495875386859776084205817131085035601296155766098439482347906015

0.00000000000000000000000000000000000000000000000000115734400185747941402636038729524294297277779348287591250613998498964955460

Menhinick

0.000000000000000000000000000000000000000000004503987434451983173853929834306851567120250354183574027764461413247232232967750751

0.00000000000000000000000000000000000000000001443819526327171816615264962755006263793337941768713241655656295244225143973407448

Margalef

0.000000000000000000000000000000000000000000006180040293912128986485846942530441638292110880718768190367664253153963295946546445

0.00000000000000000000000000000000000000000001443819526327171816615264962755006263793337941768713241655656295244225143973407448

Simpson

0.000000000005097340396796981373269619648023583883926501680861065324279479682445526123046875000000000000000000000000000000000000

0.00000000000637167549599622691853541629660850204031063981346960645169019699096679687500000000000000000000000000000000000000000

Fisher

0.000000000000000000000000000000000000000000007219097631635859083076324813775031318966689708843566208278281476221125719867037241

0.00000000000000000000000000000000000000000001443819526327171816615264962755006263793337941768713241655656295244225143973407448

Pielou

0.000000122060289277866629082845099235621333377821429166942834854125976562500000000000000000000000000000000000000000000000000000

0.00000013562254364207403525535895489478876996258804865647107362747192382812500000000000000000000000000000000000000000000000000

Gini

0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005265912

0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005265912

Strong

0.000707463921302816777653132973568972374778240919113159179687500000000000000000000000000000000000000000000000000000000000000000

0.00070746392130281677765313297356897237477824091911315917968750000000000000000000000000000000000000000000000000000000000000000

Faith

0.000000000000000000000028176643473415149740782422820494754035965515132947661625919741346102398438233649358153343200683593750000

0.00000000000000000000004696107245569191819379462273797043667450342059198889907430883019698319991164225939428433775901794433594

Kruskal-Wallis test

kruskal_results <- healthy_disease_2 %>%
  summarise(Shannon = kruskal.test(healthy_disease_2$shannon_entropy ~ healthy_disease_2$condition)$p.value,
  Chao1 = kruskal.test(healthy_disease_2$chao1 ~ healthy_disease_2$condition)$p.value,
  Fisher = kruskal.test(healthy_disease_2$fisher_alpha ~ healthy_disease_2$condition)$p.value,
  Margalef =kruskal.test(healthy_disease_2$margalef ~ healthy_disease_2$condition)$p.value,
  Simpson = kruskal.test(healthy_disease_2$simpson ~ healthy_disease_2$condition)$p.value,
  Menhinick = kruskal.test(healthy_disease_2$menhinick ~ healthy_disease_2$condition)$p.value,
  Pielou = kruskal.test(healthy_disease_2$pielou_evenness ~ healthy_disease_2$condition)$p.value,
  Gini = kruskal.test(healthy_disease_2$gini_index ~ healthy_disease_2$condition)$p.value,
  Strong = kruskal.test(healthy_disease_2$strong ~ healthy_disease_2$condition)$p.value,
  Faith = kruskal.test(healthy_disease_2$faith_pd ~ healthy_disease_2$condition)$p.value)

kruskal_results <- as.data.frame(t(kruskal_results))
colnames(kruskal_results) <- c("p.value")
kruskal_results <- data.frame(Alpha_Metric = row.names(kruskal_results), kruskal_results)
#kruskal_results$p.value <- round(kruskal_results$p.value, digits = 16)

kruskal_results %>% 
  add_column(p.adjusted = round(p.adjust(kruskal_results$p.value, "fdr"), digits=16), .after='p.value') %>%
  flextable() %>%
  bold(~ p.value < 0.05, 2) %>%
   bold(~ p.adjusted < 0.05, 3) %>%
  add_header_lines(values = "Results of the Kruskal-Wallis test for differentiation of different parameters across different conditions")

Results of the Kruskal-Wallis test for differentiation of different parameters across different conditions

Alpha_Metric

p.value

p.adjusted

Shannon

0.0000000000000000004246392780102121776568299753056980616528869404777005516771204440829023951664566993713378906250000000000000000

0.0000000000000000

Chao1

0.0000000000000000000000000000000000000000000000000004178813223213950968239993771739065953709147735605189849962132530945303923853

0.0000000000000000

Fisher

0.0000000000000000000000000000000000000000000424794544091980701140768905080238012056356238650254868457075874633936818755689692854

0.0000000000000000

Margalef

0.0000000000000000000000000000000000000000000203650524830179460521856080144402195805531725932174280345676552152558569720295201519

0.0000000000000000

Simpson

0.0000000000079879426538944013665214166571736835800732201562368572922423481941223144531250000000000000000000000000000000000000000

0.0000000000099849

Menhinick

0.0000000000000000000000000000000000000000000724877375776163258671096021375579612228253185600893242357738360660555264190868227655

0.0000000000000000

Pielou

0.0000000020879295446432806206211553939683031599905405073513975366950035095214843750000000000000000000000000000000000000000000000

0.0000000023199217

Gini

0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002846271

0.0000000000000000

Strong

0.0000000098713771800091446228245378193520698228979881605482660233974456787109375000000000000000000000000000000000000000000000000

0.0000000098713772

Faith

0.0000000000000000000000617041646894981016488012868002505320408659488983982960739137733907622163087580702267587184906005859375000

0.0000000000000000

As expected, if we mix data sets the results show significant difference between healthy and unhealthy samples for all metrics. However, this conclusion can be due to the heterogeniety of data.

CDI data

Let’s see how Clostridium difficile infection affects the alpha diversity of gut microbiome:

table(CDI$PPI_use)
## 
##  No Yes 
##  53  20
table(CDI$prior_antibiotics)
## 
##  No Yes 
##  32  41
table(CDI$response_to_treatment)
## 
## failure success 
##      10      63
table(CDI$recurrence)
## 
##  No Yes 
##  54  19
table(CDI$severe_CDI)
## 
##  No Yes 
##  68   5
#install.packages("expss")
library(expss)

cross_cases(CDI, severe_CDI, prior_antibiotics)
 prior_antibiotics 
 No   Yes 
 severe_CDI 
   No  31 37
   Yes  1 4
   #Total cases  32 41
cross_cases(CDI, severe_CDI, response_to_treatment)
 response_to_treatment 
 failure   success 
 severe_CDI 
   No  8 60
   Yes  2 3
   #Total cases  10 63
cross_cases(CDI, severe_CDI, recurrence)
 recurrence 
 No   Yes 
 severe_CDI 
   No  49 19
   Yes  5
   #Total cases  54 19
cross_cases(CDI, response_to_treatment, recurrence)
 recurrence 
 No   Yes 
 response_to_treatment 
   failure  5 5
   success  49 14
   #Total cases  54 19

Check whether increased BMI affects the alpha diversity score:

CDI$condition <- "CDI"

for (i in 1:nrow(CDI)){
  if (CDI$BMI[i] < 25){
    CDI$BMI_cat[i] <- "normal"
  } else {
    CDI$BMI_cat[i] <- "obese"
  } 
}

table(CDI$BMI_cat)
## 
## normal  obese 
##     28     45

More than half samples have BMI over 25 which classifies them in obese group. However, lets chech whether there is significant difference in alpha diversity regarding the BMI cathegory:

violin_CDI_BMI <- vector('list', length(metric))

# Use violin function
violin_CDI_BMI <- plot_violin(CDI, "BMI_cat")

#violin_CDb

grid.arrange(violin_CDI_BMI[[1]], violin_CDI_BMI[[2]], violin_CDI_BMI[[3]], violin_CDI_BMI[[4]], violin_CDI_BMI[[5]], violin_CDI_BMI[[6]], violin_CDI_BMI[[7]], violin_CDI_BMI[[8]], violin_CDI_BMI[[9]], violin_CDI_BMI[[10]], ncol=3) 

test_CDI_BMI <- list()

test_CDI_BMI <- do_wilcox_test(CDI, "BMI_cat")

test_CDI_BMI <- test_CDI_BMI %>% 
  #add_column(p.adjusted = round(p.adjust(test_CDI_BMI$p.value, "fdr"), digits=5), .after='p.value') %>%
  add_column(p.adjusted = p.adjust(test_CDI_BMI$p.value, "fdr"), .after='p.value') %>%
  arrange(p.value, parameter)  %>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "Results of the Mann-Whitney-Wilcoxon test for...")

test_CDI_BMI

Results of the Mann-Whitney-Wilcoxon test for...

parameter

group1

group2

p.value

p.adjusted

chao1

obese

normal

0.2425855

0.6176630

fisher_alpha

obese

normal

0.2470652

0.6176630

margalef

obese

normal

0.2470652

0.6176630

menhinick

obese

normal

0.2470652

0.6176630

faith_pd

obese

normal

0.5166215

0.9570358

gini_index

obese

normal

0.7225394

0.9570358

shannon_entropy

obese

normal

0.7395029

0.9570358

simpson

obese

normal

0.8260538

0.9570358

strong

obese

normal

0.8613322

0.9570358

pielou_evenness

obese

normal

0.9955044

0.9955044

Obese and normal samples do not diverge significantly in alpha diversity, so there is no need to discard more than half of the samples due to their increased BMI. Now lets compare CDI samples with AGP control:

CDI_check <- rbind.fill(CDI, all_healthy)

violin_CDI_healthy <- vector('list', length(metric))

# Use violin function
violin_CDI_healthy <- plot_violin(CDI_check, "condition")

#violin_CDb

grid.arrange(violin_CDI_healthy[[1]], violin_CDI_healthy[[2]], violin_CDI_healthy[[3]], violin_CDI_healthy[[4]], violin_CDI_healthy[[5]], violin_CDI_healthy[[6]], violin_CDI_healthy[[7]], violin_CDI_healthy[[8]], violin_CDI_healthy[[9]], violin_CDI_healthy[[10]], ncol=3) 

test_CDI_healthy <- list()

test_CDI_healthy <- do_wilcox_test(CDI_check, "condition")

test_CDI_healthy <- test_CDI_healthy %>% 
  # add_column(p.adjusted = round(p.adjust(test_CDI_healthy$p.value, "fdr"), digits=16), .after='p.value') %>%
  add_column(p.adjusted = p.adjust(test_CDI_healthy$p.value, "fdr"), .after='p.value') %>%
  arrange(p.value, parameter)  

test_CDI_healthy %>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "Results of the Mann-Whitney-Wilcoxon test for distributions of different groups of Crhohn's disease patients")

Results of the Mann-Whitney-Wilcoxon test for distributions of different groups of Crhohn's disease patients

parameter

group1

group2

p.value

p.adjusted

gini_index

healthy

CDI

0.00000000000000000000000000000000000000000000002918288

0.0000000000000000000000000000000000000000000002918288

faith_pd

healthy

CDI

0.00000000000000000000000000000000000000000000009097694

0.0000000000000000000000000000000000000000000004548847

chao1

healthy

CDI

0.00000000000000000000000000000000000000000000269230241

0.0000000000000000000000000000000000000000000089743414

menhinick

healthy

CDI

0.00000000000000000000000000000000000000000001406622205

0.0000000000000000000000000000000000000000000351655551

fisher_alpha

healthy

CDI

0.00000000000000000000000000000000000000000008065958035

0.0000000000000000000000000000000000000000001613191607

margalef

healthy

CDI

0.00000000000000000000000000000000000000000010647793214

0.0000000000000000000000000000000000000000001774632202

shannon_entropy

healthy

CDI

0.00000000000000000000975996936499686882261238201699592

0.0000000000000000000139428133785669571804551389414548

simpson

healthy

CDI

0.00000000000775175203222223986415110395186320551945414

0.0000000000096896900402778002340856634129870512701227

pielou_evenness

healthy

CDI

0.00003647992967017024066840938378852854384604142978787

0.0000405332551890780459400397128799653501118882559240

strong

healthy

CDI

0.99645701382671347801078809425234794616699218750000000

0.9964570138267134780107880942523479461669921875000000

write_xlsx(test_CDI_healthy, here("03_plots_and_tables", "CDI_AGP.xlsx"))

Means of all alpha diversity indices of CDI samples are significantly different from means of the healthy control population. Based on violin plots, both richness and evenness indices show lower values in CDI samples.

Short- and Longterm changes after fecal transplantation for recurrent CDI

With this data set of rnrow(CDI_FMT)` samples we will explore whether there FMT for recurrent CDI affects the improvement of alpha diversity. Samples from 4 patients were collected in multiple time points after transplantation

table(CDI_FMT$disease_state)
## 
## post-FMT  Pre-FMT 
##       88        4
table(CDI_FMT$animations_subject)
## 
## CD1 CD2 CD3 CD4 
##  33  22  13  24
CDI_FMT$animations_subject[CDI_FMT$animations_subject == "CD1"] <-'subject_1'
CDI_FMT$animations_subject[CDI_FMT$animations_subject == "CD2"] <-'subject_2'
CDI_FMT$animations_subject[CDI_FMT$animations_subject == "CD3"] <-'subject_3'
CDI_FMT$animations_subject[CDI_FMT$animations_subject == "CD4"] <-'subject_4'
progression <- vector('list', length(metric))

for (i in 1:length(metric)){
  progression[[i]] <- CDI_FMT %>% ggplot(aes(x=day_since_fmt, y= .data[[metric[i]]], color= CDI_FMT$animations_subj)) +
    geom_line()+
    geom_point(size=1)+
    ylab(label_fun(metric[i]))+
    xlab("Day since FMT")+
    facet_wrap(dplyr::vars(CDI_FMT$animations_subject), ncol=2)
}

progression
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

CDI_FMT_a <- CDI_FMT

for(i in 1:nrow(CDI_FMT_a)) {
  if(CDI_FMT_a$day_since_fmt[i] < 1 ){
      CDI_FMT_a$day_since_fmt[i] <- -1
  }
}
  
for (i in 1:length(metric)) {
  progression[[i]] <- CDI_FMT_a %>% ggplot(aes(x=day_since_fmt, y= .data[[metric[i]]], color =CDI_FMT$animations_subj)) +
    geom_line()+
    geom_point(size=1) +
    geom_vline(xintercept = 0, linetype = "dashed")+ 
    theme_classic() +
    theme(legend.position="none", axis.title.y=element_text(size=10), axis.title.x=element_text(size=15), title =element_text(size=10)) +
    # ggtitle(label_fun(metric[i]))+
    # ylab("")+
    ylab(label_fun(metric[i]))+
    xlab("")+
    scale_color_manual(values=c('#a6611a','#dfc27d','#80cdc1','#018571'))
  }

grid.arrange(progression[[2]], progression[[7]], ncol=1, bottom = textGrob("Day since FMT", gp = gpar(fontsize = 15)))

grid.arrange(progression[[2]], progression[[7]], progression[[9]], ncol=1, bottom = textGrob("Day since FMT", gp = gpar(fontsize = 12)))

# grid.arrange(progression[[1]], progression[[2]], progression[[3]], progression[[4]], progression[[5]], progression[[6]], ncol=2)
# grid.arrange(progression[[7]], progression[[8]], progression[[9]], progression[[10]], ncol=2, nrow=3)
for (i in 1:length(metric)) {
  progression[[i]] <- CDI_FMT[CDI_FMT$animations_subject == "subject_1",] %>% ggplot(aes(x=day_since_fmt, y= .data[[metric[i]]])) +
    geom_line(color='#018571')+
    geom_point(size=1, color='#018571') +
    theme_classic() +
    theme(legend.position="none", axis.title.y=element_text(size=12), axis.title.x=element_text(size=12)) +
    ylab(label_fun(metric[i]))+
    xlab("")
    #scale_color_brewer(palette="Set3")
    #scale_color_manual(values=c('#a6611a','#dfc27d','#80cdc1','#018571'))
  }

grid.arrange(progression[[2]], progression[[7]], ncol=1)

grid.arrange(progression[[1]], progression[[2]], progression[[3]], progression[[4]], progression[[5]], progression[[6]], ncol=3)

grid.arrange(progression[[7]], progression[[8]], progression[[9]], progression[[10]], ncol=3, bottom = textGrob("Day since FMT", gp = gpar(fontsize = 12)))

#all_tables_next <- data.frame(table1(~ t_probability | condition, data=t_test_results, overall=F, render.continuous=c("Mean (Min, Max)"="MEAN (MIN, MAX)")))
table1(~ chao1 + margalef + menhinick + fisher_alpha + faith_pd + gini_index + strong + pielou_evenness + shannon_entropy + simpson | animations_subject, data=CDI_FMT, overall=F, render.continuous=c("Min"="MIN", "Max"="MAX", "percent. difference" = "((MAX-MIN)/MIN)*100"))
subject_1
(N=33)
subject_2
(N=22)
subject_3
(N=13)
subject_4
(N=24)
chao1
Min 47.5 113 51.2 48.1
Max 287 325 226 282
percent. difference ((287-47.5)/47.5)*100 ((325-113)/113)*100 ((226-51.2)/51.2)*100 ((282-48.1)/48.1)*100
margalef
Min 4.06 6.86 4.89 4.37
Max 22.2 25.0 20.1 23.1
percent. difference ((22.2-4.06)/4.06)*100 ((25.0-6.86)/6.86)*100 ((20.1-4.89)/4.89)*100 ((23.1-4.37)/4.37)*100
menhinick
Min 0.327 0.547 0.392 0.351
Max 1.75 1.97 1.58 1.82
percent. difference ((1.75-0.327)/0.327)*100 ((1.97-0.547)/0.547)*100 ((1.58-0.392)/0.392)*100 ((1.82-0.351)/0.351)*100
fisher_alpha
Min 5.00 9.04 6.15 5.43
Max 35.4 40.8 31.4 37.1
percent. difference ((35.4-5.00)/5.00)*100 ((40.8-9.04)/9.04)*100 ((31.4-6.15)/6.15)*100 ((37.1-5.43)/5.43)*100
faith_pd
Min 6.29 10.9 7.65 7.18
Max 21.6 25.1 19.9 23.9
percent. difference ((21.6-6.29)/6.29)*100 ((25.1-10.9)/10.9)*100 ((19.9-7.65)/7.65)*100 ((23.9-7.18)/7.18)*100
gini_index
Min 0.991 0.990 0.992 0.991
Max 0.999 0.999 0.999 0.999
percent. difference ((0.999-0.991)/0.991)*100 ((0.999-0.990)/0.990)*100 ((0.999-0.992)/0.992)*100 ((0.999-0.991)/0.991)*100
strong
Min 0.645 0.685 0.665 0.690
Max 0.856 0.855 0.843 0.765
percent. difference ((0.856-0.645)/0.645)*100 ((0.855-0.685)/0.685)*100 ((0.843-0.665)/0.665)*100 ((0.765-0.690)/0.690)*100
pielou_evenness
Min 0.323 0.414 0.362 0.346
Max 0.746 0.734 0.703 0.694
percent. difference ((0.746-0.323)/0.323)*100 ((0.734-0.414)/0.414)*100 ((0.703-0.362)/0.362)*100 ((0.694-0.346)/0.346)*100
shannon_entropy
Min 1.72 2.51 2.02 1.88
Max 5.62 5.64 5.27 5.41
percent. difference ((5.62-1.72)/1.72)*100 ((5.64-2.51)/2.51)*100 ((5.27-2.02)/2.02)*100 ((5.41-1.88)/1.88)*100
simpson
Min 0.587 0.715 0.659 0.477
Max 0.969 0.970 0.953 0.954
percent. difference ((0.969-0.587)/0.587)*100 ((0.970-0.715)/0.715)*100 ((0.953-0.659)/0.659)*100 ((0.954-0.477)/0.477)*100
subjects <- unique(CDI_FMT$animations_subject)

data <- CDI_FMT[CDI_FMT$animations_subject == subjects[1],]
  
for (i in 1:length(metric)) {
  progression[[i]] <- data %>% ggplot(aes(x=day_since_fmt, y= .data[[metric[i]]])) +
    geom_line(color='#a6611a')+
    geom_point(size=1, color='#a6611a')+
    ylab(label_fun(metric[i]))+
    xlab("Day since FMT")+
    theme_classic() 
}

grid.arrange(progression[[1]], progression[[2]], progression[[3]], progression[[4]], progression[[5]], progression[[6]], progression[[7]], progression[[8]], progression[[9]], progression[[10]], ncol=3)

Even though the value is fluctuating as the time passes, we can see general trend of improvement of alpha diversity after FMT in all subjects.

Fecal transplant data analysis

From the paper: “A recent study suggests significantly lower response of CDI to FMT in patients with underlying inflammatory bowel disease (IBD)”.

The original study is looking at the changes in community composition (taxonomical) in patients after FMT. Lets examine what happens with alpha diversity as the time is passing after transplantation:

table(FMT_IBD_CDI$condition)
## 
##      CDI CDI + CD CDI + UC   Donors 
##       27        6        6        1
table(FMT_IBD_CDI$day_since_fmt)
## 
##       -1       28        7 NA-Donor 
##       14       11       14        1
violin_trans_2 <- vector('list', length(metric))

for (i in 1:length(metric)){
  mean_line <- FMT_IBD_CDI %>% dplyr::group_by(condition, day_since_fmt) %>% dplyr::summarise(grp_mean = mean(.data[[metric[i]]]))

  violin_trans_2[[i]] <- FMT_IBD_CDI %>%  
    mutate(across(day_since_fmt, factor, levels=c("-1","7","28","NA-Donor"))) %>% 
    ggplot(aes(x = .data[[metric[i]]], y = day_since_fmt, color = day_since_fmt, fill = day_since_fmt)) +
    geom_violin()+
    geom_boxplot(width=0.1, color="grey", alpha=0.2) +
    geom_vline(data = mean_line, aes(xintercept = grp_mean, color = day_since_fmt), linetype = "dashed")+ 
    xlab(label_fun(metric[i]))+
    ylab("") +
    facet_wrap(dplyr::vars(condition), nrow=1)+
    theme(legend.position="none") 
  
}

#plots for Shannon entropy
violin_trans_2
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

These plots show consistent trend of improvement of alpha diversity in 7th and 28th day after fecal microbiota transplantation. For all alpha indexes we can see that the diversity is increasing toward donor’s mean value. However, samples with underlying CD show a trend of diversity decrease in 28th day compared to 7th day. This shows that IBD can decrease the efficiency of FMT in CDI recepients.

cond <- c("CDI + UC", "CDI", "CDI + CD")
test_CDI_trans <- list()
table <- list()

for (i in 1:length(cond)){
  FMT_IBD_CDI_1 <- FMT_IBD_CDI %>%
    filter(condition == cond[i])
  
  test_CDI_trans <- do_wilcox_test(FMT_IBD_CDI_1, "day_since_fmt")

  table <- test_CDI_trans %>%
    # add_column(p.adjusted = round(p.adjust(test_CDI_trans$p.value, "fdr"), digits=5), .after='p.value') %>%
    add_column(p.adjusted = p.adjust(test_CDI_trans$p.value, "fdr"), .after='p.value') %>%
    flextable() %>%
    bold(~ p.value < 0.05, 4) %>%
    bold(~ p.adjusted < 0.05, 5) %>%
    add_header_lines(values = paste("Results of the Mann-Whitney-Wilcoxon test for condition:", cond[i], sep = " "))
   
 print(table)
 
 test_CDI_trans <- list()
}

From these three tables we can see that only for CDI patients without underlying IBD show signifficant improvement after FMT.

Conclusions

Based on previous analysis we can draw following conclusions:

Session information

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=es_ES.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    grid      stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] moments_0.14.1             rstatix_0.7.2              caret_6.0-90               lattice_0.20-45            ROSE_0.0-4                 expss_0.11.4              
##  [7] maditr_0.8.3               table1_1.4.3               tableone_0.13.2            ggplotify_0.1.0            ComplexHeatmap_2.10.0      purrr_1.0.1               
## [13] data.table_1.14.8          readr_2.1.4                stringr_1.5.0              varImp_0.4                 party_1.3-13               strucchange_1.5-3         
## [19] sandwich_3.0-1             modeltools_0.2-23          mvtnorm_1.1-3              measures_0.3               randomForest_4.7-1.1       readxl_1.4.2              
## [25] car_3.1-2                  carData_3.0-5              psych_2.3.3                writexl_1.4.2              here_1.0.1                 RColorBrewer_1.1-3        
## [31] corrplot_0.92              PerformanceAnalytics_2.0.4 xts_0.12.1                 zoo_1.8-9                  nortest_1.0-4              flextable_0.9.1           
## [37] tibble_3.2.1               cowplot_1.1.1              plyr_1.8.8                 gridExtra_2.3              ggplot2_3.4.2              dplyr_1.1.1               
## [43] rmarkdown_2.21             knitr_1.42                
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.3              tidyselect_1.2.0        htmlwidgets_1.6.2       pROC_1.18.0             munsell_0.5.0           codetools_0.2-18        ragg_1.2.1             
##   [8] future_1.24.0           withr_2.5.0             colorspace_2.1-0        highr_0.10              uuid_1.1-0              rstudioapi_0.14         officer_0.6.2          
##  [15] fontLiberation_0.1.0    listenv_0.8.0           labeling_0.4.2          mnormt_2.1.1            farver_2.1.1            rprojroot_2.0.2         parallelly_1.30.0      
##  [22] vctrs_0.6.1             generics_0.1.3          TH.data_1.1-0           ipred_0.9-12            xfun_0.38               timechange_0.2.0        fontquiver_0.2.1       
##  [29] R6_2.5.1                doParallel_1.0.17       clue_0.3-64             cachem_1.0.7            gridGraphics_0.5-1      promises_1.2.0.1        scales_1.2.1           
##  [36] multcomp_1.4-18         nnet_7.3-17             gtable_0.3.3            globals_0.14.0          timeDate_3043.102       rlang_1.1.0             systemfonts_1.0.4      
##  [43] GlobalOptions_0.1.2     splines_4.1.2           ModelMetrics_1.2.2.2    checkmate_2.0.0         broom_1.0.4             yaml_2.3.7              reshape2_1.4.4         
##  [50] abind_1.4-5             backports_1.4.1         httpuv_1.6.5            tools_4.1.2             lava_1.6.10             ellipsis_0.3.2          jquerylib_0.1.4        
##  [57] proxy_0.4-26            BiocGenerics_0.40.0     Rcpp_1.0.10             rpart_4.1.16            openssl_2.0.6           GetoptLong_1.0.5        S4Vectors_0.32.4       
##  [64] haven_2.5.2             cluster_2.1.2           survey_4.1-1            crul_1.3                magrittr_2.0.3          circlize_0.4.15         matrixStats_0.61.0     
##  [71] hms_1.1.3               mime_0.12               evaluate_0.20           xtable_1.8-4            IRanges_2.28.0          shape_1.4.6             compiler_4.1.2         
##  [78] fontBitstreamVera_0.1.1 crayon_1.5.2            htmltools_0.5.5         later_1.3.0             tzdb_0.3.0              Formula_1.2-4           tidyr_1.3.0            
##  [85] libcoin_1.0-9           lubridate_1.9.2         DBI_1.1.3               MASS_7.3-55             Matrix_1.4-0            cli_3.6.1               mitools_2.4            
##  [92] quadprog_1.5-8          parallel_4.1.2          gower_1.0.0             forcats_1.0.0           pkgconfig_2.0.3         coin_1.4-2              recipes_0.1.17         
##  [99] xml2_1.3.3              foreach_1.5.2           bslib_0.4.2             prodlim_2019.11.13      yulab.utils_0.0.6       digest_0.6.31           httpcode_0.3.0         
## [106] cellranger_1.1.0        htmlTable_2.4.0         gdtools_0.3.3           curl_5.0.0              shiny_1.7.4             rjson_0.2.21            lifecycle_1.0.3        
## [113] nlme_3.1-155            jsonlite_1.8.4          askpass_1.1             fansi_1.0.4             labelled_2.11.0         pillar_1.9.0            fastmap_1.1.1          
## [120] survival_3.2-13         glue_1.6.2              zip_2.2.0               png_0.1-7               iterators_1.0.14        class_7.3-20            stringi_1.7.12         
## [127] sass_0.4.5              textshaping_0.3.6       gfonts_0.2.0            e1071_1.7-9             future.apply_1.8.1